<?php

/**
 * 爬虫主程序
 * @author 暮雨秋晨
 * @copyright 2014
 */

class Crawler
{
    private $BaseURL; //站点地址
    private $URL_LIST = array(); //待处理链接列表
    private $Handle_Count = 0; //处理数量计数器
    public function __construct($url)
    {
        $this->URL_LIST[] = $url;
        $BaseUrl = parse_url($url);
        if (substr_count($BaseUrl['host'], '.') > 1) {
            $BaseUrl = explode('.', $BaseUrl['host']);
            $count = count($BaseUrl) - 1;
            $BaseUrl = $BaseUrl[$count - 1] . '.' . $BaseUrl[$count];
            $this->BaseURL = $BaseUrl['host'];
        } else {
            return false;
        }
    }

    public function run()
    {
        $db = new Model('mysql', '127.0.0.1', 'root', 'root', 'crawler');
        do {
            $url = array_shift($this->URL_LIST);
            $analyze = new Analyze(Grab::fetch($url, Grab::FETCH_CURL), $this->BaseURL);
            $title = $analyze->findTitle();
            if (!empty($title)) {
                $db->insert('records', array(
                    'records_title' => addslashes($title),
                    'records_content' => addslashes($analyze->fetchHtml()),
                    'records_createtime' => time()));
                $links = $analyze->findLink();
                $this->URL_LIST = array_unique($this->URL_LIST + $links);
                $this->Handle_Count++;
                echo $this->Handle_Count . "\t" . $title . "\t" . $url . "\r\n";
            }
        } while (!empty($this->URL_LIST));
    }
}

?>